#某热搜榜信息采集

import requests
from lxml import etree

url='http://hot.meibp.com/'

cookies = {
    'Hm_lvt_1d9b8e4e110b54c48922093ef42f94fe': '1647522958',
    'PHPSESSID': 'e5ne2vg34tkfkjseuduod1q5ss',
    'Hm_lpvt_1d9b8e4e110b54c48922093ef42f94fe': '1647523063',
    'UM_distinctid': '17f9806e4e4886-0e3b4c1d996d63-977173c-1fa400-17f9806e4e535f',
    'CNZZDATA1278227787': '951014879-1647514960-%7C1647514960',
}

headers = {
    'Connection': 'keep-alive',
    'Cache-Control': 'max-age=0',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Language': 'zh-CN,zh;q=0.9',
}

response = requests.get(url, cookies=cookies, headers=headers,verify=False)
html=etree.HTML(response.text)
divs = html.xpath('//div[@class="items"]/div[@class="row"]/div')

for div in divs:
    cat = div.xpath('./a/@title')
    for a in div.xpath('./div/div/a'):
        result = {
            "热搜类别": "".join(cat),
            "标题": "".join(a.xpath('./@title')),
            "链接": "".join(a.xpath('./@href'))
        }
        print(result)